library(knitr)
opts_chunk$set(comment=NA, fig.width=6, fig.height=6, results='asis', warning=FALSE, message=FALSE)

Przygotowanie

library(ggplot2)
library(scales)
library(dplyr)
library(archivist)

# store everything in the archivist repo
setLocalRepo("arepo")

q3 <- function(x) {
  a <- quantile(x, c(0.25,0.5,0.75))
  names(a) <- c("ymin", "y", "ymax")
  a
}

print.ggplot <- function(x, ...) {
  hash <- saveToRepo(x)
  cat("Load: [`archivist::aread('pbiecek/Eseje/arepo/",hash,"')`](https://github.com/pbiecek/Eseje/raw/master/arepo/gallery/",hash,".rda)\n", sep="")
  ggplot2:::print.ggplot(x, ...)
}

Dane

W tym skrypcie wykorzystujemy pakiet SmarterPoland i zbiory danych

library(SmarterPoland)
head(countries)
          country birth.rate death.rate population continent

1 Afghanistan 34.1 7.7 30552 Asia 2 Albania 12.9 9.4 3173 Europe 3 Algeria 24.3 5.7 39208 Africa 4 Andorra 8.9 8.4 79 Europe 5 Angola 44.1 13.9 21472 Africa 6 Antigua and Barbuda 16.5 6.8 90 Americas

head(maturaExam)

podstawowy.matematyka podstawowy.j.polski rok 1 19 35 2010 2 16 43 2010 3 25 39 2010 4 27 35 2010 5 27 43 2010 6 31 42 2010

Warstwy

countriesMin <- countries %>% 
  group_by(continent) %>% 
  filter(birth.rate == min(birth.rate, na.rm=TRUE))
countriesMax <- countries %>% 
  group_by(continent) %>% 
  filter(birth.rate == max(birth.rate, na.rm=TRUE))
  
ggplot(countries, aes(x=continent, y=birth.rate, label=country)) +
  geom_violin(scale="area", fill="grey", color="white") + 
  stat_summary(fun.data = "q3", geom = "crossbar",
               colour = "red", width = 0.4) + 
  geom_jitter(position=position_jitter(width = .25, height = 0),
             shape=15) +
  geom_rug(sides = "l") + 
  geom_text(data=countriesMin, vjust=2, color="blue3") + 
  geom_text(data=countriesMax, vjust=-1, color="blue3") + 
  theme_bw() + xlab("") + theme(panel.grid.major.x = element_line(color="white"))

Load: archivist::aread('pbiecek/Eseje/arepo/e02ad04b68e55035371b3b99112cdf3c')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_density_2d(h=c(10,10), color="grey") +
  geom_point() + coord_fixed() +
  geom_abline(intercept=0,slope=1) + 
  geom_point(data=countries[132,], color="red", size=4) + 
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/13d5c2cf07e277c07e776473b106a1b0')

Mapowania

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  geom_point() + coord_fixed() +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/19794b19037daced210ff5c6d485d3be')

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=continent, shape=continent)) +
  geom_point() + coord_fixed() +
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/4d7b9c309ad4875690da61c194f5ca1f')

countries$populationCat <- cut(countries$population, 
                               c(1, 10^3, 10^4, 10^5, 10^6, 10^7), 
                               labels = c("< 1M", "< 10M","< 100 M", "< 1 B", "> 1 B"), ordered_result = TRUE)

ggplot(countries, aes(x=birth.rate, y=death.rate, 
                      color=continent, shape=continent,
                      size=populationCat)) +
  geom_point() + coord_fixed() +
  theme_bw() + theme(legend.position="top") 

Load: archivist::aread('pbiecek/Eseje/arepo/f7bee75d70a77bd9e1ae66323afe8f6e')

Formy / Geometrie

Punkty

# dotplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_dotplot(binaxis = "y", stackdir = "center", binwidth = 0.7) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/ccf21a147baebcf2498c7fc44f59e882')

# dotplot
ggplot(countries, aes(x = birth.rate, y =death.rate)) +
  geom_point() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/4cc6027ddf39539bf985daefae7db7f5')

# jitter
ggplot(countries, aes(x = continent, y =birth.rate)) +
  geom_jitter(position = position_jitter(width = .2)) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/7f97f57921664fcb7d30d927575dcb34')

# różne mapownia
ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent), size=4) +
  theme_bw() +
  scale_shape_manual(values=c("F","A","S","E","O")) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/9603959bdb5fcde4916de25ac635783f')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, shape=continent, color=continent), size=4) +
  theme_bw() +
  scale_shape_manual(values=c("F","A","S","E","O")) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/0b7c872b34944871597cf7ee22a10124')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, color=continent), size=4, shape=19) +
  theme_bw() + scale_color_brewer(type = "qual", palette=6) +
  theme(legend.position=c(0.9,0.17))

Load: archivist::aread('pbiecek/Eseje/arepo/69e4470a064eca4faf21ecb10a9f9994')

ggplot() +
  geom_point(data=countries, aes(x = birth.rate, y =death.rate, size=population)) +
  scale_size_continuous(trans="sqrt", label=comma, limits=c(0,1500000)) +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/0a05f31ead54caee9572292f385caed5')

Obszar

# density
ggplot(countries, aes(x = birth.rate, fill = continent)) +
  geom_density(alpha=0.5) +
  theme_bw() + theme(legend.position=c(0.85,0.85))

Load: archivist::aread('pbiecek/Eseje/arepo/7929dd727c479b1c939b3d0c5729cdc4')

ggplot(countries, aes(x = birth.rate, fill = continent)) +
  geom_density( position="fill", color=NA) +
  theme_bw() + theme(legend.position="top")

Load: archivist::aread('pbiecek/Eseje/arepo/43e30649a593a48c27f143b257851755')

# vioplot
ggplot(countries, aes(x = continent, y = birth.rate, fill=continent)) +
  geom_violin() +
  theme_bw() + theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/26d9c58bd0ff95117e1d37aeb47512de')

# ribbon
ndf <- countries %>%
  group_by(continent) %>%
  summarise(birth.rate = weighted.mean(birth.rate, population, na.rm=TRUE),
            death.rate = weighted.mean(death.rate, population, na.rm=TRUE))

ggplot() + 
  geom_ribbon(data=ndf, aes(x=continent, ymax=birth.rate, y=birth.rate, ymin=0, group=1), fill="green3") +
  geom_ribbon(data=ndf, aes(x=continent, ymax=death.rate, y=death.rate, ymin=0, group=1), fill="red3") +
  theme_bw() + xlab("") + ylab("birth.rate / death.rate")

Load: archivist::aread('pbiecek/Eseje/arepo/70366ebb679bdc0d6bee88b9056932b7')

Prostokąty

continents <- 
  countries %>%
  group_by(continent) %>%
  summarise(birth.rate = weighted.mean(birth.rate, w = population, na.rm=TRUE),
            death.rate = weighted.mean(death.rate, w = population, na.rm=TRUE),
            population = sum(population))

# bar
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_rect(xmin=12.38, xmax=27.85, ymin=0, ymax=18, alpha=0.3, fill="grey90") +
  geom_point() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/19bf57d4e6f50edc4e1a3d057912b044')

# bar
ggplot(continents, aes(x = continent, y = birth.rate)) +
  geom_bar(stat = "identity") +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/59442be9ea10e26f0b817f6ad0dd0f31')

continents2 <- continents %>%
  mutate(cum = cumsum(population)-population)

ggplot() +
  geom_rect(data=continents2, aes(xmin=cum, ymax=0, xmax=cum+population, ymin=birth.rate, fill=continent)) +
  geom_text(data=continents2, aes(label=continent, x=cum+population/2, y=birth.rate), vjust=-0.1) + 
  theme_bw() + theme(legend.position="none") +
  xlab("population") + ylab("birth.rate")

Load: archivist::aread('pbiecek/Eseje/arepo/519b348ef25c8106739f6554d20efdd0')

ggplot() +
  geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))-0.1, 
                                 ymin = 0, 
                                 xmax = as.numeric(factor(continent))+0.2, 
                                 ymax = birth.rate),
            fill="green3") +
  geom_rect(data=continents, aes(xmin = as.numeric(factor(continent))+0.21, 
                                 ymin = birth.rate - death.rate, 
                                 xmax = as.numeric(factor(continent))+0.51, 
                                 ymax = birth.rate),
            fill="red3") +
  geom_text(data=continents, aes(x = as.numeric(factor(continent))+0.21, 
                                 y = birth.rate + 1, 
                                 label = continent)) + 
  geom_hline(yintercept=0) + ylab("birth.rate - death.rate") + xlab("") +
  geom_rug(data=continents, aes(x = as.numeric(factor(continent))+0.21, 
                                y = birth.rate - death.rate), sides="l") +
  theme_bw() + theme(axis.text.x = element_text(color="white"),
                     axis.ticks.x = element_line(color="white"))

Load: archivist::aread('pbiecek/Eseje/arepo/ddd7f9aa123375309cd1ad9a96700df0')

# liczba krajów na kontynent
ggplot(countries, aes(x = continent, fill = continent)) +
  geom_bar() +
  theme_bw() + theme(legend.position="none") + xlab("") + ylab("Liczba krajów")

Load: archivist::aread('pbiecek/Eseje/arepo/7c0e64651d0f94435d6bb9af38289e92')

Linie

# line
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_line() +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/c05f80f85c7e7a16ae69ebba0ea0275f')

# smooth
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(color="white", alpha=0) +
  geom_smooth(se=FALSE, size=3, color="black") +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/6f9b33857dc61785006596611b013acf')

ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_point(color="black", alpha=0.3) +
  geom_smooth(se=FALSE, size=2, color="red4", method="lm", formula = y~poly(x,1)) +
  geom_smooth(se=FALSE, size=2, color="red3", method="lm", formula = y~poly(x,2)) +
  geom_smooth(se=FALSE, size=2, color="red1", span=0.5) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/fccfa878e6600c58e939979081d088b0')

# arrow
library(grid)
countries$country <- reorder(countries$country, countries$birth.rate, mean)
ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                  y = birth.rate, yend=death.rate),
               arrow = arrow(length = unit(0.1,"cm"))) +
  theme_bw() + coord_flip() + theme(legend.position="none") + 
  ylab("<---- more births                more deaths ---->") + xlab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/dc15df3d9b42f188b488b5184f2813ae')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     color=death.rate > birth.rate),
               arrow = arrow(length = unit(0.1,"cm"))) +
  theme_bw() + coord_flip() + theme(legend.position="top") + 
  ylab("<---- more births                more deaths ---->") + xlab("") +
  scale_color_manual(values = c("green3", "red3"), labels=c("More births than deaths","More deaths than births"), name="")

Load: archivist::aread('pbiecek/Eseje/arepo/6066f80bf716415d37f4f40cde479193')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     size=population),
               arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
  theme_bw() + coord_flip() + theme(legend.position="none") + 
  ylab("<---- more births                more deaths ---->") + xlab("") 

Load: archivist::aread('pbiecek/Eseje/arepo/a3f4a11791537fd9d3ed8c3c22a825b1')

ggplot() +
  geom_segment(data=countries[countries$continent == "Europe",], aes(x = country, xend = country, 
                                                                     y = birth.rate, yend=death.rate,
                                                                     linetype=birth.rate > death.rate),
               arrow = arrow(length = unit(0.1,"cm"), type="closed")) +
  theme_bw() + coord_flip() + theme(legend.position="top") + 
  ylab("<---- more births                more deaths ---->") + xlab("") +
  scale_linetype_manual(values = c(1,2), labels=c("More births than deaths","More deaths than births"), name="")

Load: archivist::aread('pbiecek/Eseje/arepo/1bb4e242a49297bf6a54fa64f5680416')

# error bars
conts <- countries %>% 
  group_by(continent) %>%
  summarise(bmin = min(birth.rate, na.rm=TRUE),
            bmax = max(birth.rate, na.rm=TRUE),
            bmea = weighted.mean(birth.rate, w = population, na.rm=TRUE),
            dmin = min(death.rate, na.rm=TRUE),
            dmax = max(death.rate, na.rm=TRUE),
            dmea = weighted.mean(death.rate, w = population, na.rm=TRUE),
            population = sum(population, na.rm=TRUE)
  )

ggplot(conts, aes(x = bmea, y = dmea, 
                  ymin = dmin, ymax = dmax,
                  xmin = bmin, xmax = bmax,
                  color=continent))+
 geom_point() + 
  geom_errorbar(width=0.5) +
#  geom_errorbarh(width=0.5) + 
  theme_bw() + xlab("birth.rate") + ylab("death.rate") +
  theme(legend.position="none")

Load: archivist::aread('pbiecek/Eseje/arepo/2c21b7e6a84f25312c9bc0f02575a273')

Inne geometrie

# stat_binhex
# geomBinHex.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  stat_binhex(bins = 9) + scale_fill_gradient(low = "white", high = "black") + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/eed22b09727397c0b8748dc1e254f949')

# geomRug
# geomRug.pdf
ggplot(countries, aes(x = birth.rate, y = death.rate)) +
  geom_rug() + 
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/d4d0d1b2f1aeadc298e47253987c1975')

continents <- 
  countries %>%
  group_by(continent) %>%
  summarise(birth.rate = mean(birth.rate, na.rm=TRUE),
            death.rate = mean(death.rate, na.rm=TRUE))
  
# text
#geomText.pdf
ggplot(continents, aes(x = birth.rate, y = death.rate, label = continent)) +
  geom_text(alpha=1) +
  theme_bw() + xlim(8,35)

Load: archivist::aread('pbiecek/Eseje/arepo/7c1208e511986b6a8ac7b5b3518605d0')

Statystyki

# boxplot
ggplot(countries, aes(x = continent, y = birth.rate)) +
  geom_boxplot(fill="grey", coef = 3) +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/de2382fd045555f9409cbce2896e6f8b')

# crossbar
ggplot(countries, aes(x = continent, y = birth.rate)) +
  stat_summary(fun.data = "mean_cl_boot", geom = "crossbar", width = 0.3)  +
  theme_bw()

Load: archivist::aread('pbiecek/Eseje/arepo/4e3e8969f3d6e57855af56396be06f0c')

# strzalki
library(tidyr)
library(grid)
cq <- countries %>%
  group_by(continent) %>%
  summarise(q1 = quantile(birth.rate, 0.25, na.rm=TRUE),
            q2 = quantile(birth.rate, 0.5, na.rm=TRUE),
            q3 = quantile(birth.rate, 0.75, na.rm=TRUE))
cq13 <- cq %>%
  gather(key, value, -continent)

# statQ1.pdf
ggplot(cq13, aes(x=continent, y=value, group=continent)) +
  geom_path(arrow=arrow(ends = "both")) + 
  geom_point(data=cq13[cq13$key == "q2",], aes(x=continent, y=value), size=4) +
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/087e55b1802f862d49c358658fc42cbb')

# statQ2.pdf
ggplot(cq, aes(x=continent, y=q2)) +
  geom_boxplot(aes(ymin=q1, lower=q1, middle=q2, upper=q3, ymax=q3), stat="identity") +
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/228eed14d8b5f88d69ee19fda3dbf42c')

# statQ3.pdf
ggplot(cq, aes(x=continent, y=q2)) +
  geom_errorbar(aes(ymin=q1, ymax=q3), stat="identity", width=0.3) +
  geom_text(label="*", size=15) + 
  theme_bw() + xlab("") + ylab("Kwartyle i mediana dla wsp. urodzin")

Load: archivist::aread('pbiecek/Eseje/arepo/d939d7c448683d337e95b9cfe8b3f93c')

# contour
ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  geom_density2d(h=c(10,10), color="grey") +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/d04db533e2252ce6af66a11ee2c00c27')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), geom="polygon", alpha=0.25) +
  theme_bw() 

Load: archivist::aread('pbiecek/Eseje/arepo/0c77057670bfe2e0510d2b454bd27f15')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), geom="tile", aes(fill = ..density..), contour = FALSE) + scale_fill_gradient(low="white", high="black") +
  theme_minimal() 

Load: archivist::aread('pbiecek/Eseje/arepo/3ecbf4864e8e78a32659990785ef9e57')

ggplot(countries, aes(x=birth.rate, y=death.rate)) +
  coord_fixed() + xlim(0,50) + ylim(0,16)+
  stat_density2d(h=c(10,10), n=c(50,15), geom="point", aes(size = ..density..), contour = FALSE) + scale_size_continuous(range=c(0,2)) +
  theme_minimal() 

Load: archivist::aread('pbiecek/Eseje/arepo/4becd71bfca1970acc67bcf0b7f3bdb5')

Modyfikatory położenia

Układ współrzędnych

Miary / podziałki / skale

Panele / oblicza

Dekoracje